In [1]:
# load libraries
import numpy as np
import pandas as pd 
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
# nltk.download('vader_lexicon')
# nltk.download('punkt')
# nltk.download('averaged_perceptron_tagger')
# nltk.download('universal_tagset')
# nltk.download('sentiwordnet')
# nltk.download('wordnet')
# nltk.download('stopwords')
from tqdm.notebook import tqdm
from datetime import datetime
import re
import time
from nltk.corpus import sentiwordnet as swn
from nltk.tag import pos_tag,map_tag
from nltk.stem import WordNetLemmatizer
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
import warnings
warnings. simplefilter(action='ignore', category=Warning) 

# for offline ploting
# ===================
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode()
In [2]:
# Load customer feedback data from kaggle dataset
df = pd.read_csv("D:\\Google Cloud Storage\\NLP sentiment analysis\\twcs\\twcs.csv")
df.head()
Out[2]:
tweet_id author_id inbound created_at text response_tweet_id in_response_to_tweet_id
0 1 sprintcare False Tue Oct 31 22:10:47 +0000 2017 @115712 I understand. I would like to assist y... 2 3.0
1 2 115712 True Tue Oct 31 22:11:45 +0000 2017 @sprintcare and how do you propose we do that NaN 1.0
2 3 115712 True Tue Oct 31 22:08:27 +0000 2017 @sprintcare I have sent several private messag... 1 4.0
3 4 sprintcare False Tue Oct 31 21:54:49 +0000 2017 @115712 Please send us a Private Message so th... 3 5.0
4 5 115712 True Tue Oct 31 21:49:35 +0000 2017 @sprintcare I did. 4 6.0

--------------------------------------------------------Sentiment Analysis------------------------------------------------------


In [3]:
print('shape: ',df.shape)
print('Columns: ',df.columns)
shape:  (2811774, 7)
Columns:  Index(['tweet_id', 'author_id', 'inbound', 'created_at', 'text',
       'response_tweet_id', 'in_response_to_tweet_id'],
      dtype='object')
In [4]:
# Focus only on messages/complaints from customers i.e inbound tweets only
customer_msg = df[df.inbound]
customer_msg.shape
Out[4]:
(1537843, 7)
In [5]:
# Original dataset includes support data from multiple companies. Here we choose one of the companies for instace Sprint in this case.
customer_msg = customer_msg[customer_msg['text'].str.contains("sprintcare")]
print(customer_msg.shape)
customer_msg.head()
(13714, 7)
Out[5]:
tweet_id author_id inbound created_at text response_tweet_id in_response_to_tweet_id
1 2 115712 True Tue Oct 31 22:11:45 +0000 2017 @sprintcare and how do you propose we do that NaN 1.0
2 3 115712 True Tue Oct 31 22:08:27 +0000 2017 @sprintcare I have sent several private messag... 1 4.0
4 5 115712 True Tue Oct 31 21:49:35 +0000 2017 @sprintcare I did. 4 6.0
6 8 115712 True Tue Oct 31 21:45:10 +0000 2017 @sprintcare is the worst customer service 9,6,10 NaN
8 12 115713 True Tue Oct 31 22:04:47 +0000 2017 @sprintcare You gonna magically change your co... 11,13,14 15.0
In [6]:
# Find duplicates
print(len(customer_msg.index))#13714
serlis=customer_msg.duplicated().tolist()
print(serlis.count(True))#0
serlis=customer_msg.duplicated(['text']).tolist()
print(serlis.count(True))#324
13714
0
324
In [7]:
# Drop duplicates
customer_msg.drop_duplicates(['text'],inplace=True)
customer_msg.reset_index(drop=True,inplace=True)
customer_msg.shape
Out[7]:
(13390, 7)
In [8]:
# Enable progress bar on processing
tqdm.pandas()
In [9]:
# BASIC CLEANING FUNCTION
def clean_text(text):
    txt = text
    txt=re.sub(r'@[A-Z0-9a-z_:]+','',txt)#username-tags
    txt=re.sub(r'^[RT]+','',txt)#RT-tags
    txt = re.sub('https?://[A-Za-z0-9./]+','',txt)#URLs
    txt=re.sub("[^a-zA-Z]", " ",txt)#hashtags
    return(txt)

customer_msg.text = customer_msg.text.progress_apply(clean_text) 
customer_msg.head()   

Out[9]:
tweet_id author_id inbound created_at text response_tweet_id in_response_to_tweet_id
0 2 115712 True Tue Oct 31 22:11:45 +0000 2017 and how do you propose we do that NaN 1.0
1 3 115712 True Tue Oct 31 22:08:27 +0000 2017 I have sent several private messages and no o... 1 4.0
2 5 115712 True Tue Oct 31 21:49:35 +0000 2017 I did 4 6.0
3 8 115712 True Tue Oct 31 21:45:10 +0000 2017 is the worst customer service 9,6,10 NaN
4 12 115713 True Tue Oct 31 22:04:47 +0000 2017 You gonna magically change your connectivity ... 11,13,14 15.0
In [10]:
# Change data type to be datetime for column "created_at" and
# sort by the ascending order so we can analize the messages based on the time when they are created 
customer_msg['created_at'] = pd.to_datetime(customer_msg.created_at)
customer_msg = customer_msg.sort_values(by='created_at')
In [11]:
# Instantiate sentiment analyzer from NLTK, make "sentiment_analyze" function
sentiment_analyzer = SentimentIntensityAnalyzer()

def sentiment_analyze(text: str) -> float:
    return sentiment_analyzer.polarity_scores(text)['compound']
In [12]:
# Analyze customer sentiment based on their messages
customer_msg['sentiment'] = \
    customer_msg.text.progress_apply(sentiment_analyze)

In [13]:
positive = len(customer_msg[customer_msg['sentiment']>0])
negative = len(customer_msg[customer_msg['sentiment']<0])
neutral = len(customer_msg[customer_msg['sentiment']==0])
temp = pd.DataFrame({'Sentiment':['Positive', 'Neutral', 'Negative'],'No. of Customers':[positive,neutral,negative]})
import plotly.express as px
fig = px.bar(temp,x='Sentiment', y='No. of Customers',
             color='Sentiment',title= 'Sprintcare customer sentiments' )
fig.show()
In [14]:
# Group by author_id so we can deal with messages from different customers(author_id)
customer_grouped = customer_msg.groupby('author_id')

Usecase 1

In [15]:
# Case 1, we can calculate average sentiment values for each customer over this data chunk
# and compare to predefined alert threshold. 
# If the average sentiment value is lower than the threshold we'll list all the customers(author_id) 
# and suggest to reach to them for further discussion to find the reason and resolve the issues proactively
author_sentiment_avg = customer_grouped.sentiment.mean().sort_values()
author_sentiment_avg_df = pd.DataFrame({'author_id':author_sentiment_avg.index, 'sentiment':author_sentiment_avg.values})
alert_threshold_avg = -0.7
author_sentiment_avg_df[author_sentiment_avg_df.sentiment <= alert_threshold_avg]
Out[15]:
author_id sentiment
0 397327 -0.952000
1 494819 -0.941900
2 421239 -0.934800
3 467207 -0.930000
4 654664 -0.912400
5 586657 -0.912000
6 559476 -0.911800
7 547269 -0.910600
8 219151 -0.910000
9 348144 -0.908100
10 498176 -0.904200
11 221558 -0.901733
12 270562 -0.899900
13 459380 -0.899000
14 573385 -0.896400
15 795777 -0.894200
16 253999 -0.893300
17 339800 -0.892500
18 675169 -0.889800
19 663436 -0.889500
20 309266 -0.888500
21 544138 -0.888500
22 609363 -0.888500
23 174146 -0.880700
24 289306 -0.873800
25 148370 -0.873500
26 685906 -0.873000
27 572445 -0.871800
28 486713 -0.865800
29 768215 -0.863600
... ... ...
176 738127 -0.726900
177 200587 -0.726400
178 429848 -0.722700
179 370703 -0.722050
180 718255 -0.720000
181 774628 -0.718400
182 723344 -0.718400
183 235878 -0.718400
184 737289 -0.718400
185 606468 -0.718400
186 549325 -0.718400
187 427813 -0.718400
188 191382 -0.718400
189 518336 -0.718400
190 548582 -0.718400
191 334600 -0.718400
192 639534 -0.718400
193 207413 -0.709600
194 229325 -0.709600
195 499110 -0.708900
196 251140 -0.708900
197 526588 -0.706300
198 424262 -0.703400
199 157819 -0.700300
200 196571 -0.700300
201 686786 -0.700300
202 229243 -0.700300
203 659286 -0.700300
204 328195 -0.700300
205 756501 -0.700200

206 rows × 2 columns

In [16]:
# Five point summary box plot and distribution plots for negative average sentiment scores of customers
import plotly.express as px
import plotly.figure_factory as ff
temp = author_sentiment_avg_df[author_sentiment_avg_df['sentiment']<0]
fig = px.box(temp, y="sentiment", points="all")
fig.show()

hist_data = [temp['sentiment']]
group_labels = ['Dist. of average negative sentiments of customers'] # name of the dataset

fig = ff.create_distplot(hist_data, group_labels)
fig.show()

Usecase 2

In [17]:
# Case 2, we can find the minimum sentiment values for each customer over this data chunk
# and compare to predefined alert threshold. 
# If the minimum sentiment value is lower than the threshold we'll list all the customers(author_id) 
# and suggest to reach to them for further discussion to find the reason and resolve the issues proactively
author_sentiment_lowest = customer_grouped.sentiment.min().sort_values()
author_sentiment_lowest_df = pd.DataFrame({'author_id':author_sentiment_lowest.index, 'sentiment':author_sentiment_lowest.values})
alert_threshold_min = -0.8
author_sentiment_lowest_df[author_sentiment_lowest_df.sentiment <= alert_threshold_min]
Out[17]:
author_id sentiment
0 221558 -0.9741
1 463642 -0.9594
2 164264 -0.9571
3 189882 -0.9554
4 766600 -0.9524
5 397327 -0.9520
6 684865 -0.9477
7 789866 -0.9457
8 494819 -0.9419
9 426589 -0.9400
10 486889 -0.9360
11 355443 -0.9360
12 421239 -0.9348
13 467207 -0.9300
14 414324 -0.9280
15 423257 -0.9274
16 808637 -0.9260
17 177117 -0.9260
18 822515 -0.9260
19 239757 -0.9251
20 150834 -0.9221
21 437302 -0.9209
22 298399 -0.9201
23 427804 -0.9200
24 181845 -0.9191
25 737386 -0.9186
26 136460 -0.9175
27 737951 -0.9169
28 757550 -0.9153
29 350669 -0.9148
... ... ...
211 187870 -0.8126
212 605050 -0.8126
213 736810 -0.8122
214 672851 -0.8122
215 177447 -0.8091
216 230916 -0.8087
217 591647 -0.8074
218 494100 -0.8074
219 520402 -0.8074
220 315735 -0.8074
221 810109 -0.8074
222 294448 -0.8074
223 370880 -0.8074
224 644451 -0.8074
225 466684 -0.8074
226 203830 -0.8047
227 338870 -0.8030
228 619674 -0.8023
229 166622 -0.8020
230 478297 -0.8020
231 134203 -0.8020
232 126203 -0.8020
233 576163 -0.8020
234 387505 -0.8020
235 217320 -0.8020
236 154080 -0.8020
237 128570 -0.8020
238 802547 -0.8020
239 424974 -0.8016
240 203121 -0.8016

241 rows × 2 columns

In [18]:
# Five point summary box plot and distribution plots for negative average sentiment scores of customers
import plotly.express as px
import plotly.figure_factory as ff
temp = author_sentiment_lowest_df[author_sentiment_lowest_df['sentiment']<0]
fig = px.box(temp, y="sentiment", points="all")
fig.show()

hist_data = [temp['sentiment']]
group_labels = ['Dist. of min. sentiment scores of customers'] # name of the dataset

fig = ff.create_distplot(hist_data, group_labels)
fig.show()

--------------------------------------------------------Topic Modelling------------------------------------------------------


In [19]:
# !pip install --upgrade pycaret
# !python -m spacy download en_core_web_sm
# !python -m textblob.download_corpora
In [20]:
#import nlp module
from pycaret.nlp import *
In [21]:
customer_msg.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 13390 entries, 4969 to 2093
Data columns (total 8 columns):
tweet_id                   13390 non-null int64
author_id                  13390 non-null object
inbound                    13390 non-null bool
created_at                 13390 non-null datetime64[ns]
text                       13390 non-null object
response_tweet_id          9278 non-null object
in_response_to_tweet_id    10194 non-null float64
sentiment                  13390 non-null float64
dtypes: bool(1), datetime64[ns](1), float64(2), int64(1), object(3)
memory usage: 850.0+ KB
In [22]:
customer_msg['text']=customer_msg['text'].values.astype('U')
customer_msg.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 13390 entries, 4969 to 2093
Data columns (total 8 columns):
tweet_id                   13390 non-null int64
author_id                  13390 non-null object
inbound                    13390 non-null bool
created_at                 13390 non-null datetime64[ns]
text                       13390 non-null object
response_tweet_id          9278 non-null object
in_response_to_tweet_id    10194 non-null float64
sentiment                  13390 non-null float64
dtypes: bool(1), datetime64[ns](1), float64(2), int64(1), object(3)
memory usage: 850.0+ KB
In [23]:
# initialize the setup
nlp = setup(data = customer_msg, target = 'text')
Description Value
session_id 5137
Documents 13390
Vocab Size 5064
Custom Stopwords False
INFO - setup() succesfully completed......................................
In [24]:
# create the model
lda = create_model('lda', num_topics = 5, multi_core = True)
INFO - LdaModel(num_terms=5064, num_topics=5, decay=0.5, chunksize=100)
INFO - create_model() succesfully completed......................................
In [25]:
# label the data using trained model
df_lda = assign_model(lda)
INFO - (13390, 15)
INFO - assign_model() succesfully completed......................................
In [29]:
df_lda.head()
Out[29]:
tweet_id author_id inbound created_at text response_tweet_id in_response_to_tweet_id sentiment Topic_0 Topic_1 Topic_2 Topic_3 Topic_4 Dominant_Topic Perc_Dominant_Topic
0 2 115712 True 2017-10-31 22:11:45 propose NaN 1.0 0.0000 0.033336 0.034517 0.033592 0.034114 0.864442 Topic 4 0.86
1 3 115712 True 2017-10-31 22:08:27 send several private message respond usual 1 4.0 -0.2960 0.022252 0.137602 0.795204 0.022235 0.022707 Topic 2 0.80
2 5 115712 True 2017-10-31 21:49:35 4 6.0 0.0000 0.161964 0.787537 0.016796 0.016915 0.016788 Topic 1 0.79
3 8 115712 True 2017-10-31 21:45:10 bad customer service 9,6,10 NaN -0.6249 0.432980 0.505775 0.020438 0.020565 0.020242 Topic 1 0.51
4 12 115713 True 2017-10-31 22:04:47 go magically change connectivity whole family 11,13,14 15.0 0.0000 0.050208 0.050001 0.505127 0.343875 0.050790 Topic 2 0.51
In [27]:
plot_model(lda, plot='topic_distribution')
plot_model(lda, plot='topic_model')
plot_model(lda, plot='wordcloud', topic_num = 'Topic 3')
plot_model(lda, plot='frequency', topic_num = 'Topic 3')
plot_model(lda, plot='bigram', topic_num = 'Topic 3')
plot_model(lda, plot='trigram', topic_num = 'Topic 3')
plot_model(lda, plot='distribution', topic_num = 'Topic 3')
plot_model(lda, plot='sentiment', topic_num = 'Topic 3')
plot_model(lda, plot='tsne')
INFO - Initializing plot_model()
INFO - plot_model(model=LdaModel(num_terms=5064, num_topics=5, decay=0.5, chunksize=100), plot=topic_distribution, topic_num=None, save=False, system=True)
INFO - Topic selected. topic_num : Topic 0
INFO - Checking exceptions
INFO - Importing libraries
INFO - save_param set to False
INFO - plot type: topic_distribution
INFO - SubProcess assign_model() called ==================================
INFO - Initializing assign_model()
INFO - assign_model(model=LdaModel(num_terms=5064, num_topics=5, decay=0.5, chunksize=100), verbose=False)
INFO - Determining model type
INFO - model type: lda
INFO - Checking exceptions
INFO - Preloading libraries
INFO - Preparing display monitor
IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

INFO - (13390, 15)
INFO - assign_model() succesfully completed......................................
INFO - SubProcess assign_model() end ==================================
INFO - Sorting Dataframe
INFO - Rendering Visual
INFO - Visual Rendered Successfully
INFO - plot_model() succesfully completed......................................
INFO - Initializing plot_model()
INFO - plot_model(model=LdaModel(num_terms=5064, num_topics=5, decay=0.5, chunksize=100), plot=topic_model, topic_num=None, save=False, system=True)
INFO - Topic selected. topic_num : Topic 0
INFO - Checking exceptions
INFO - Importing libraries
INFO - save_param set to False
INFO - plot type: topic_model
INFO - Preparing pyLDAvis visual
INFO - Visual Rendered Successfully
INFO - plot_model() succesfully completed......................................
INFO - Initializing plot_model()
INFO - plot_model(model=LdaModel(num_terms=5064, num_topics=5, decay=0.5, chunksize=100), plot=wordcloud, topic_num=Topic 3, save=False, system=True)
INFO - Checking exceptions
INFO - Importing libraries
INFO - save_param set to False
INFO - plot type: wordcloud
INFO - SubProcess assign_model() called ==================================
INFO - Initializing assign_model()
INFO - assign_model(model=LdaModel(num_terms=5064, num_topics=5, decay=0.5, chunksize=100), verbose=False)
INFO - Determining model type
INFO - model type: lda
INFO - Checking exceptions
INFO - Preloading libraries
INFO - Preparing display monitor
IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

INFO - (13390, 15)
INFO - assign_model() succesfully completed......................................
INFO - SubProcess assign_model() end ==================================
INFO - Fitting WordCloud()
INFO - Rendering Visual
INFO - Visual Rendered Successfully
INFO - plot_model() succesfully completed......................................
INFO - Initializing plot_model()
INFO - plot_model(model=LdaModel(num_terms=5064, num_topics=5, decay=0.5, chunksize=100), plot=frequency, topic_num=Topic 3, save=False, system=True)
INFO - Checking exceptions
INFO - Importing libraries
INFO - save_param set to False
INFO - plot type: frequency
INFO - Rendering Visual
INFO - SubProcess assign_model() called ==================================
INFO - Initializing assign_model()
INFO - assign_model(model=LdaModel(num_terms=5064, num_topics=5, decay=0.5, chunksize=100), verbose=False)
INFO - Determining model type
INFO - model type: lda
INFO - Checking exceptions
INFO - Preloading libraries
INFO - Preparing display monitor
IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

INFO - (13390, 15)
INFO - assign_model() succesfully completed......................................
INFO - SubProcess assign_model() end ==================================
INFO - Fitting CountVectorizer()
INFO - Visual Rendered Successfully
INFO - plot_model() succesfully completed......................................
INFO - Initializing plot_model()
INFO - plot_model(model=LdaModel(num_terms=5064, num_topics=5, decay=0.5, chunksize=100), plot=bigram, topic_num=Topic 3, save=False, system=True)
INFO - Checking exceptions
INFO - Importing libraries
INFO - save_param set to False
INFO - plot type: bigram
INFO - SubProcess assign_model() called ==================================
INFO - Initializing assign_model()
INFO - assign_model(model=LdaModel(num_terms=5064, num_topics=5, decay=0.5, chunksize=100), verbose=False)
INFO - Determining model type
INFO - model type: lda
INFO - Checking exceptions
INFO - Preloading libraries
INFO - Preparing display monitor
IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

INFO - (13390, 15)
INFO - assign_model() succesfully completed......................................
INFO - SubProcess assign_model() end ==================================
INFO - Fitting CountVectorizer()
INFO - Rendering Visual
INFO - Visual Rendered Successfully
INFO - plot_model() succesfully completed......................................
INFO - Initializing plot_model()
INFO - plot_model(model=LdaModel(num_terms=5064, num_topics=5, decay=0.5, chunksize=100), plot=trigram, topic_num=Topic 3, save=False, system=True)
INFO - Checking exceptions
INFO - Importing libraries
INFO - save_param set to False
INFO - plot type: trigram
INFO - SubProcess assign_model() called ==================================
INFO - Initializing assign_model()
INFO - assign_model(model=LdaModel(num_terms=5064, num_topics=5, decay=0.5, chunksize=100), verbose=False)
INFO - Determining model type
INFO - model type: lda
INFO - Checking exceptions
INFO - Preloading libraries
INFO - Preparing display monitor
IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

INFO - (13390, 15)
INFO - assign_model() succesfully completed......................................
INFO - SubProcess assign_model() end ==================================
INFO - Fitting CountVectorizer()
INFO - Rendering Visual
INFO - Visual Rendered Successfully
INFO - plot_model() succesfully completed......................................
INFO - Initializing plot_model()
INFO - plot_model(model=LdaModel(num_terms=5064, num_topics=5, decay=0.5, chunksize=100), plot=distribution, topic_num=Topic 3, save=False, system=True)
INFO - Checking exceptions
INFO - Importing libraries
INFO - save_param set to False
INFO - plot type: distribution
INFO - SubProcess assign_model() called ==================================
INFO - Initializing assign_model()
INFO - assign_model(model=LdaModel(num_terms=5064, num_topics=5, decay=0.5, chunksize=100), verbose=False)
INFO - Determining model type
INFO - model type: lda
INFO - Checking exceptions
INFO - Preloading libraries
INFO - Preparing display monitor
IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

INFO - (13390, 15)
INFO - assign_model() succesfully completed......................................
INFO - SubProcess assign_model() end ==================================
INFO - Rendering Visual
INFO - Visual Rendered Successfully
INFO - plot_model() succesfully completed......................................
INFO - Initializing plot_model()
INFO - plot_model(model=LdaModel(num_terms=5064, num_topics=5, decay=0.5, chunksize=100), plot=sentiment, topic_num=Topic 3, save=False, system=True)
INFO - Checking exceptions
INFO - Importing libraries
INFO - save_param set to False
INFO - plot type: sentiment
INFO - SubProcess assign_model() called ==================================
INFO - Initializing assign_model()
INFO - assign_model(model=LdaModel(num_terms=5064, num_topics=5, decay=0.5, chunksize=100), verbose=False)
INFO - Determining model type
INFO - model type: lda
INFO - Checking exceptions
INFO - Preloading libraries
INFO - Preparing display monitor
IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

INFO - (13390, 15)
INFO - assign_model() succesfully completed......................................
INFO - SubProcess assign_model() end ==================================
INFO - Fitting TSNE()
INFO - Sorting Dataframe
INFO - Rendering Visual
INFO - Visual Rendered Successfully
INFO - plot_model() succesfully completed......................................
In [28]:
evaluate_model(lda)
In [ ]:
 
In [ ]:
 
In [ ]: